In [1]:
from ggplot import *
import pandas as pd
import numpy as np

Datasets


In [2]:
meat.tail()


Out[2]:
date beef veal pork lamb_and_mutton broilers other_chicken turkey
822 2012-07-01 00:00:00 2200.8 9.5 1721.8 12.5 3127.0 43.4 497.2
823 2012-08-01 00:00:00 2367.5 10.1 1997.9 14.2 3317.4 51.0 530.1
824 2012-09-01 00:00:00 2016.0 8.8 1911.0 12.5 2927.1 43.7 453.1
825 2012-10-01 00:00:00 2343.7 10.3 2210.4 14.2 3335.0 43.8 579.9
826 2012-11-01 00:00:00 2206.6 10.1 2078.7 12.4 3006.7 37.5 515.3

In [3]:
diamonds.head()


Out[3]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75

In [4]:
pageviews.head()


Out[4]:
date_hour pageviews
0 2013-02-11 21:00:00 8860.982383
1 2013-02-11 22:00:00 8637.474753
2 2013-02-11 23:00:00 9020.593099
3 2013-02-12 00:00:00 8437.500380
4 2013-02-12 01:00:00 9157.399672

In [5]:
mtcars.head()


Out[5]:
name mpg cyl disp hp drat wt qsec vs am gear carb
0 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
1 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
3 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
4 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2

ggplot


In [6]:
aes(x='wt', y='mpg')


Out[6]:
{'y': 'mpg', 'x': 'wt'}

In [7]:
ggplot(aes(x='wt', y='mpg'))


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-4c191f620be2> in <module>()
----> 1 ggplot(aes(x='wt', y='mpg'))

TypeError: __init__() takes exactly 3 arguments (2 given)

In [8]:
p = ggplot(mtcars, aes(x='wt', y='mpg'))

In [9]:
p = ggplot(aes(x='wt', y='mpg'), data=mtcars)

This is blank because we didn't pass the plot any "layers"


In [10]:
p


Out[10]:
<ggplot: (281312861)>

Let's add some points...


In [11]:
p + geom_point()


Out[11]:
<ggplot: (281840941)>

The geoms

You can see all of them here.


In [12]:
p + geom_point()


Out[12]:
<ggplot: (277318757)>

geom_line


In [13]:
ggplot(aes(x='date', y='beef'), data=meat) + geom_line()


Out[13]:
<ggplot: (281848029)>

geom_step


In [14]:
df = pd.DataFrame({"x": range(100)})
df['y'] = np.random.choice([-1, 1], 100)
df.y = df.y.cumsum()

In [15]:
ggplot(aes(x='x', y='y'), data=df) + geom_step()


Out[15]:
<ggplot: (281895749)>

geom_histogram


In [16]:
ggplot(aes(x='carat'), data=diamonds) + geom_histogram()


binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Out[16]:
<ggplot: (281900621)>

geom_density


In [17]:
df = pd.DataFrame(dict(x=np.random.normal(0, 1, 10000)))
ggplot(df, aes(x='x')) + geom_density()


Out[17]:
<ggplot: (281459473)>

Combining Geoms


In [18]:
ggplot(aes(x='date', y='beef'), data=meat) +\
    geom_point(color='red', alpha=0.3) +\
    geom_line()


Out[18]:
<ggplot: (282659613)>

In [19]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) +\
    geom_point() +\
    geom_abline(color='blue', slope=-5, intercept=40)


Out[19]:
<ggplot: (281458785)>

In [20]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) +\
    geom_point(color="blue", size=200) +\
    geom_point(color="white", size = 100) +\
    geom_point(color="red", size = 25)


Out[20]:
<ggplot: (281900649)>

In [21]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) +\
    geom_point(color="blue", size=200) +\
    geom_point(color="white", size = 100) +\
    geom_point(color="red", size = 25) +\
    geom_vline(x=4.5)


Out[21]:
<ggplot: (281896157)>

In [22]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) +\
    geom_point(color="blue", size=200) +\
    geom_point(color="white", size = 100) +\
    geom_point(color="red", size = 25) +\
    geom_hline(y=12.5) +\
    geom_vline(x=4.5)


Out[22]:
<ggplot: (278820309)>